Importing data and packages

library(sf)
library(raster)
library(tidyverse)
library(magrittr)
# Read crime data to tibble
Crime_2019 <- read_csv("data/crime-2019.csv", 
                       col_types = list(ID = col_integer(),
                                        `Case Number` = col_character(),
                                        Date = col_datetime(format = "%m/%d/%Y %H:%M:%S %p"),
                                        Block = col_factor(),
                                        IUCR = col_factor(),
                                        `Primary Type` = col_factor(),
                                        Description = col_factor(),
                                        `Location Description` = col_factor(),
                                        Arrest = col_logical(),
                                        Domestic = col_logical(),
                                        Beat = col_factor(),
                                        District = col_factor(),
                                        Ward = col_integer(),
                                        `Community Area` = col_integer(),
                                        `X Coordinate` = col_double(),
                                        `Y Coordinate` = col_double(),
                                        Year = col_integer(),
                                        `Updated On` = col_datetime(format = "%m/%d/%Y %H:%M:%S %p"),
                                        Latitude = col_double(),
                                        Longitude = col_double(),
                                        Location = col_character()))
Crime_NA <- na.omit(Crime_2019) %>% tibble()
Crime_2019
## # A tibble: 260,430 x 22
##        ID `Case Number` Date                Block IUCR  `Primary Type`
##     <int> <chr>         <dttm>              <fct> <fct> <fct>         
##  1 1.22e7 JD444016      2019-10-03 14:00:00 003X~ 1150  DECEPTIVE PRA~
##  2 1.22e7 JD443965      2019-12-01 00:01:00 038X~ 1752  OFFENSE INVOL~
##  3 1.22e7 JD442995      2019-11-27 08:00:00 055X~ 5002  OTHER OFFENSE 
##  4 1.20e7 JD194800      2019-05-10 11:00:00 043X~ 1195  DECEPTIVE PRA~
##  5 1.19e7 JC532990      2019-12-03 13:08:00 027X~ 041A  BATTERY       
##  6 1.22e7 JD441586      2019-07-22 00:01:00 071X~ 1150  DECEPTIVE PRA~
##  7 1.22e7 JD399576      2019-12-07 12:33:00 066X~ 1544  SEX OFFENSE   
##  8 1.19e7 JC535200      2019-12-04 06:00:00 032X~ 1563  SEX OFFENSE   
##  9 1.18e7 JC358770      2019-07-21 18:35:00 041X~ 041A  BATTERY       
## 10 1.22e7 JD439375      2019-11-23 00:00:00 050X~ 0265  CRIMINAL SEXU~
## # ... with 260,420 more rows, and 16 more variables: Description <fct>,
## #   `Location Description` <fct>, Arrest <lgl>, Domestic <lgl>, Beat <fct>,
## #   District <fct>, Ward <int>, `Community Area` <int>, `FBI Code` <chr>, `X
## #   Coordinate` <dbl>, `Y Coordinate` <dbl>, Year <int>, `Updated On` <dttm>,
## #   Latitude <dbl>, Longitude <dbl>, Location <chr>
colnames(Crime_NA) %<>% strsplit(split = " ") %>% lapply(paste, collapse = "")
head(Crime_NA)
## # A tibble: 6 x 22
##       ID CaseNumber Date                Block IUCR  PrimaryType Description
##    <int> <chr>      <dttm>              <fct> <fct> <fct>       <fct>      
## 1 1.20e7 JD194800   2019-05-10 11:00:00 043X~ 1195  DECEPTIVE ~ FINANCIAL ~
## 2 1.19e7 JC532990   2019-12-03 13:08:00 027X~ 041A  BATTERY     AGGRAVATED~
## 3 1.22e7 JD399576   2019-12-07 12:33:00 066X~ 1544  SEX OFFENSE SEXUAL EXP~
## 4 1.19e7 JC535200   2019-12-04 06:00:00 032X~ 1563  SEX OFFENSE CRIMINAL S~
## 5 1.18e7 JC358770   2019-07-21 18:35:00 041X~ 041A  BATTERY     AGGRAVATED~
## 6 1.21e7 JD272123   2019-01-17 00:00:00 075X~ 1750  OFFENSE IN~ CHILD ABUSE
## # ... with 15 more variables: LocationDescription <fct>, Arrest <lgl>,
## #   Domestic <lgl>, Beat <fct>, District <fct>, Ward <int>,
## #   CommunityArea <int>, FBICode <chr>, XCoordinate <dbl>, YCoordinate <dbl>,
## #   Year <int>, UpdatedOn <dttm>, Latitude <dbl>, Longitude <dbl>,
## #   Location <chr>
# Load shapefile containing community areas
com_bounds <- st_read("data/community_areas.shp", quiet = TRUE)
com_bounds %<>% arrange(as.integer(area_numbe))
print(com_bounds, n = 3)
## Simple feature collection with 77 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## geographic CRS: WGS84(DD)
## First 3 features:
##   area area_num_1 area_numbe comarea comarea_id   community perimeter
## 1    0          1          1       0          0 ROGERS PARK         0
## 2    0          2          2       0          0  WEST RIDGE         0
## 3    0          3          3       0          0      UPTOWN         0
##   shape_area shape_len                       geometry
## 1   51259902  34052.40 MULTIPOLYGON (((-87.65456 4...
## 2   98429095  43020.69 MULTIPOLYGON (((-87.68465 4...
## 3   65095643  46972.79 MULTIPOLYGON (((-87.64102 4...
plot(st_geometry(com_bounds))

Area numbers in com_bounds correspond to CommunityArea in Crime_NA, making plotting maps easier.

Crime_summary <- Crime_NA %>% select(CommunityArea) %>% table() %>% as.data.frame()
colnames(Crime_summary) <- c("CommunityArea", "Freq")
head(Crime_summary)
##   CommunityArea Freq
## 1             1 3991
## 2             2 3416
## 3             3 3283
## 4             4 1761
## 5             5 1238
## 6             6 5857
com_bounds$crime_summary <- Crime_summary$Freq
plot(com_bounds["crime_summary"], main = "Crime frequency by community area")

The map is less detailed as it only includes community areas, but its construction is easier and doesn’t require longitude and latitude coordinates (which are often missing).

Use geom_sf to make a ggplot. More details at: https://r-spatial.github.io/sf/articles/sf5.html and https://www.r-spatial.org/r/2018/10/25/ggplot2-sf-2.html

For colours I like to use colorspace: https://cran.r-project.org/web/packages/colorspace/vignettes/colorspace.html

p <- ggplot(com_bounds) + geom_sf(aes(fill = crime_summary, text = community)) + 
  ggtitle("Crime frequency by community area") +
  colorspace::scale_fill_continuous_sequential(5, palette = "Inferno") + theme_void() +
  guides(fill = guide_colorbar(title = "Frequency")) +
  scale_fill_gradientn(colors = colorspace::sequential_hcl(5, palette = "Inferno")[5:1], 
                       breaks = seq(0, 15000, 2500), limits = c(0, 15000))
p

Trying out interactive plots with plotly.

library(plotly)
p %>% ggplotly(tooltip = "text") %>% style(hoverlabel = list(bgcolor = "white"), hoveron = "fill")

Can use google map backgrounds with plotly and ggplot2. Used the following tutorials: https://www.littlemissdata.com/blog/maps https://cengel.github.io/R-spatial/mapping.html

library(ggmap)

register_google(key = "AIzaSyAv2lYIQ4Iw-Qu4bDPFsGB-Rdosvr_U208")

map_centre <- as.data.frame(geocode("chicago"))

basemap <- get_map(location=c(lon = map_centre$lon, lat = map_centre$lat), zoom=11)

p <- ggmap(basemap) + geom_sf(data = com_bounds, aes(fill = crime_summary, text = community, alpha = 0.3), inherit.aes = F) + 
  ggtitle("Crime frequency by community area") +
  colorspace::scale_fill_continuous_sequential(5, palette = "Inferno") + theme_void() +
  guides(fill = guide_colorbar(title = "Frequency")) +
  scale_fill_gradientn(colors = colorspace::sequential_hcl(5, palette = "Inferno")[5:1], 
                       breaks = seq(0, 15000, 2500), limits = c(0, 15000))
## Warning: Ignoring unknown aesthetics: text
p %>% ggplotly(tooltip = "text") %>% style(hoverlabel = list(bgcolor = "white"), hoveron = "fill")

Or instead can create interactive maps using leaflet package (but as far as I know doesn’t work with plotly). Tutorial: https://rstudio.github.io/leaflet/shapes.html.

library(leaflet)

leaflet(com_bounds) %>% addTiles() %>%
  addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
              opacity = 1.0, fillOpacity = 0.4,
              fillColor = ~colorNumeric("inferno", crime_summary, reverse = T)(crime_summary),
              highlightOptions = highlightOptions(color = "white", weight = 2,
                                                  bringToFront = TRUE), label = com_bounds$community) %>% addLegend(pal  = colorNumeric("inferno", com_bounds$crime_summary, reverse = T), values = ~com_bounds$crime_summary, title = "Crimes") 
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=WGS84 +no_defs).
## Need '+proj=longlat +datum=WGS84'